## behavioral data:
d <- fread(here("in", "behavior-and-events_stroop_2021-10-20_nice.csv"))
subjs_behav <- as.data.table(table(d$subj, d$session, d$wave))
names(subjs_behav) <- c("subj", "session", "wave", "nrow_eprime")
# subjs_behav$is_missing <- subjs_behav$nrow_eprime == 0
# subjs_behav$is_incomplete <- subjs_behav$nrow_eprime < 216
## fmri data (list of subjs who have):
fnames_subjlist <- list.files(here("in/subjs"), pattern = "^subjlist", full.names = TRUE)
names(fnames_subjlist) <- gsub("^.*stroop-rsa-pc/in/subjs/subjlist_fmri_(.*)\\.txt", "\\1", fnames_subjlist)
subjs_fmri <- rbindlist(lapply(fnames_subjlist, fread), idcol = "dmcc_session")
names(subjs_fmri)[2] <- "subj"
subjs_fmri <- separate(subjs_fmri, dmcc_session, c("dmcc", "session"))
subjs_fmri$wave <- ifelse(subjs_fmri$dmcc == "dmcc2", "wave1", ifelse(subjs_fmri$dmcc == "dmcc3", "wave2", "wave3"))
subjs_fmri$dmcc <- NULL
subjs_fmri <- as.data.table(subjs_fmri)
subjs_fmri$needs_rerun <- grepl("_", subjs_fmri$subj)
subjs_fmri$subj <- gsub("(.*[0-9])_(.*)", "\\1", subjs_fmri$subj)
## plot fmri and behav data
mat_fmri <- table(subjs_fmri$subj, paste0(subjs_fmri$wave, "_", subjs_fmri$session)) %>%
as.data.frame %>%
rename(subj = Var1, wave_session = Var2) %>%
mutate(id = "fmri")
mat_behav <- subjs_behav %>%
mutate(wave_session = paste0(wave, "_", session), Freq = nrow_eprime %in% c(216, 240), id = "behav") %>%
select(subj, wave_session, Freq, id)
mat <- full_join(mat_fmri, mat_behav)
## Joining, by = c("subj", "wave_session", "Freq", "id")
mat %>%
mutate(wave_session = gsub("wave([1-3])_(...).*", "w\\1_\\2", wave_session)) %>%
ggplot(aes(wave_session, subj, fill = Freq)) +
geom_raster() +
facet_grid(cols = vars(id)) +
scale_fill_gradient(low = "white", high = "black") +
theme(legend.position = "none") +
labs(x = "wave_session", y = "subj", title = "missing (white)")
## MB8 subjs
subjs_mb8 <- c("162026", "179245", "182840", "198855", "205220", "214524", "568963", "623844")
## twins
# from /data/nil-external/ccp/JosetAEtzel/DMCC_files/getPairs.Rsubjs_twins
subjs_mz <- data.table(
a = c(
"DMCC1624043", "DMCC1971064", "233326", "209228", "DMCC4260551", "DMCC6755891", "DMCC6904377", "DMCC9850294",
"162026", "DMCC8078683", "DMCC7297690", "DMCC6960387", "DMCC5065053", "DMCC5820265", "DMCC3206338"
),
b = c(
"DMCC3204738", "DMCC3509558", "352738", "765864", "DMCC7921988", "DMCC8050964", "DMCC9953810", "DMCC2560452",
"568963", "DMCC6484785", "DMCC1596165", "DMCC8260571", "DMCC4368773", "DMCC3091953", "DMCC4854984"
)
)
subjs_mz_mbsr <- data.table(
a = c(
"182436", "115825", "250427", "171330", "DMCC2834766", "DMCC2803654", "DMCC4191255", "562345",
"DMCC5195268", "DMCC6661074", "DMCC5775387", "DMCC6371570", "DMCC2442951", "DMCC1328342",
"DMCC3963378", "DMCC3876181", "DMCC5244053"
),
b = c(
"178647", "178243", "877168", "393550", "DMCC6671683", "DMCC9478705", "DMCC6418065", "130518",
"DMCC8033964", "DMCC6705371", "DMCC6721369", "DMCC9441378", "DMCC6627478", "DMCC5009144",
"DMCC2609759", "DMCC0472647", "DMCC8760894"
)
)
subjs_dz <- data.table(
a = c("198855", "DMCC8214059"),
b = c("623844", "DMCC3062542")
)
subjs_dz_mbsr <- data.table(
a = c("130114"),
b = c("155938")
)
subjs_twins <- rbindlist(list(mz = subjs_mz, mz = subjs_mz_mbsr, dz = subjs_dz, dz = subjs_dz_mbsr), idcol = "twin")
subjs_twins[, twinpair := 1:.N]
subjs_twins <- melt(subjs_twins, id.vars = c("twin", "twinpair"), value.name = "subj")[, -"variable"]
## poor quality
# from: https://github.com/ccplabwustl/R01/blob/master/Jo/datasetQC/blocksTasksToDrop.txt
subjs_jolist <- data.table(
rbind(
## DMCC2 for behavior ("sleeping"):
c("behavior", "wave1", "233326", "proactive"),
c("behavior", "wave1", "DMCC6371570", "reactive"),
# DMCC2 for movement (too much censoring):
c("movement", "wave1", "873968", "proactive"),
c("movement", "wave1", "DMCC1971064", "proactive"),
# DMCC3 for behavior ("sleeping"):
c("behavior", "wave2", "161832", "baseline")
)
)
names(subjs_jolist) <- c("issue", "wave", "subj", "session")
## bind
#subjs <- full_join(left_join(full_join(subjs_behav, subjs_fmri), subjs_twins), subjs_jolist)
subjs_with_fmri <- inner_join(subjs_behav, subjs_fmri, by = c("session", "subj", "wave"))
subjs_with_fmri <- left_join(subjs_with_fmri, subjs_twins, by = "subj") ## add twin info
subjs <- full_join(subjs_with_fmri, subjs_jolist, by = c("session", "subj", "wave")) ## add jolist
nrow(subjs) == nrow(subjs_with_fmri) ## TRUE if all subj*session*waves in jolist are in subjs
## [1] TRUE
subjs_samp <- subjs[nrow_eprime %in% c(216, 240)]
subjs[nrow_eprime == 0]
## subj session wave nrow_eprime needs_rerun twin twinpair issue
## 1: 171330 baseline wave1 0 FALSE mz 19 <NA>
## 2: 843151 baseline wave1 0 FALSE <NA> NA <NA>
## 3: DMCC8033964 baseline wave1 0 FALSE mz 24 <NA>
## 4: 182840 proactive wave1 0 FALSE <NA> NA <NA>
## 5: 250427 proactive wave1 0 FALSE mz 18 <NA>
## 6: DMCC1624043 proactive wave1 0 FALSE mz 1 <NA>
## 7: 250427 reactive wave1 0 FALSE mz 18 <NA>
## 8: 352738 reactive wave1 0 FALSE mz 3 <NA>
## 9: DMCC3204738 reactive wave1 0 FALSE mz 1 <NA>
## 10: DMCC7297690 reactive wave1 0 FALSE mz 11 <NA>
## 11: 130114 baseline wave2 0 FALSE dz 35 <NA>
## 12: DMCC6418065 baseline wave2 0 FALSE mz 22 <NA>
## 13: DMCC6671683 baseline wave2 0 FALSE mz 20 <NA>
## 14: 130114 proactive wave2 0 FALSE dz 35 <NA>
## 15: DMCC6418065 proactive wave2 0 FALSE mz 22 <NA>
## 16: DMCC6671683 proactive wave2 0 FALSE mz 20 <NA>
## 17: 130114 reactive wave2 0 FALSE dz 35 <NA>
## 18: DMCC6671683 reactive wave2 0 FALSE mz 20 <NA>
subjs[nrow_eprime > 0 & nrow_eprime < 216]
## subj session wave nrow_eprime needs_rerun twin twinpair issue
## 1: 178243 baseline wave1 189 FALSE mz 17 <NA>
kable(subjs_samp[subj %in% subjs_mb8])
| subj | session | wave | nrow_eprime | needs_rerun | twin | twinpair | issue |
|---|---|---|---|---|---|---|---|
| 162026 | baseline | wave1 | 216 | FALSE | mz | 9 | NA |
| 179245 | baseline | wave1 | 216 | FALSE | NA | NA | NA |
| 182840 | baseline | wave1 | 216 | FALSE | NA | NA | NA |
| 205220 | baseline | wave1 | 216 | FALSE | NA | NA | NA |
| 568963 | baseline | wave1 | 216 | FALSE | mz | 9 | NA |
| 623844 | baseline | wave1 | 216 | FALSE | dz | 33 | NA |
| 162026 | proactive | wave1 | 216 | FALSE | mz | 9 | NA |
| 179245 | proactive | wave1 | 216 | FALSE | NA | NA | NA |
| 205220 | proactive | wave1 | 216 | FALSE | NA | NA | NA |
| 568963 | proactive | wave1 | 216 | FALSE | mz | 9 | NA |
| 162026 | reactive | wave1 | 240 | FALSE | mz | 9 | NA |
| 179245 | reactive | wave1 | 240 | FALSE | NA | NA | NA |
| 182840 | reactive | wave1 | 240 | FALSE | NA | NA | NA |
| 205220 | reactive | wave1 | 240 | FALSE | NA | NA | NA |
| 568963 | reactive | wave1 | 240 | FALSE | mz | 9 | NA |
| 623844 | reactive | wave1 | 240 | FALSE | dz | 33 | NA |
| 568963 | baseline | wave2 | 216 | FALSE | mz | 9 | NA |
| 568963 | proactive | wave2 | 216 | FALSE | mz | 9 | NA |
| 568963 | reactive | wave2 | 240 | FALSE | mz | 9 | NA |
subjs_samp <- subjs_samp[!subj %in% subjs_mb8]
568963 was MB8 in wave 1, but MB4 in wave 2. Exclude this subj’s wave2 data for now (but could include later).
kable(subjs_samp[issue == "movement"])
| subj | session | wave | nrow_eprime | needs_rerun | twin | twinpair | issue |
|---|---|---|---|---|---|---|---|
| 873968 | proactive | wave1 | 216 | FALSE | NA | NA | movement |
| DMCC1971064 | proactive | wave1 | 216 | FALSE | mz | 2 | movement |
subjs_samp <- subjs_samp[is.na(issue) | issue != "movement"]
Two wave\(\cdot\)session\(\cdot\)subjs have excessive movements, as defined by Jo’s criteria (more than 20% of frames censored, i.e., FD > 0.9). Excluded.
d_rt <- d %>%
right_join(subjs_samp) %>%
filter(rt > 0, acc == 1)
## Joining, by = c("wave", "session", "subj")
d_rt %>%
ggplot(aes(rt, subj, fill = wave, color = wave)) +
geom_density_ridges(alpha = 0.2) +
facet_grid(cols = vars(session)) +
scale_fill_viridis_d() +
scale_color_viridis_d()
## Picking joint bandwidth of 43.5
## Picking joint bandwidth of 42.4
## Picking joint bandwidth of 42.6
l <- split(d_rt, droplevels(interaction(d_rt$subj, d_rt$wave, d_rt$session)))
d_rt$rt_resid <- unlist(lapply(l, function(x) resid(lm(rt ~ item, x))))
d_rt %>%
# filter(subj %in% "849971") %>%
ggplot(aes(sample = rt_resid)) +
geom_qq(size = 0.75, shape = 16) +
geom_qq_line() +
facet_grid(rows = vars(subj), cols = vars(wave, session))
Not much looks weird from this angle.
d_er <- d %>% right_join(subjs_samp)
## Joining, by = c("wave", "session", "subj")
sum_er <- d_er %>%
group_by(subj, session, wave) %>%
mutate(total = n()) %>%
group_by(subj, session, wave, acc_final) %>%
summarize(
total = unique(total),
percent = sum(n()) / total*100
)
## `summarise()` has grouped output by 'subj', 'session', 'wave'. You can override using the `.groups` argument.
sum_er %>%
filter(acc_final != "1") %>%
ggplot(aes(percent, subj, color = acc_final)) +
geom_point(size = 2) +
geom_vline(xintercept = 10, color = "grey60") +
facet_grid(cols = vars(wave, session)) +
scale_color_viridis_d() +
theme(legend.position = c(0.8, 0.1))
Line shows 10% threshold for “no response” used in Freund et al. (2021) JNeurosci.
bad <- sum_er %>% filter(acc_final == "no.response" & percent > 10)
bad
## # A tibble: 8 x 6
## # Groups: subj, session, wave [8]
## subj session wave acc_final total percent
## <chr> <chr> <chr> <chr> <int> <dbl>
## 1 160830 reactive wave1 no.response 240 15.4
## 2 161832 baseline wave2 no.response 216 37.0
## 3 161832 reactive wave2 no.response 240 27.9
## 4 197449 baseline wave1 no.response 216 17.1
## 5 197449 baseline wave3 no.response 216 13.4
## 6 233326 proactive wave1 no.response 216 14.4
## 7 DMCC5009144 baseline wave2 no.response 216 13.4
## 8 DMCC6371570 reactive wave1 no.response 240 32.5
sum(subjs_samp$issue == "behavior", na.rm = TRUE)
## [1] 3
subjs_samp <- subjs_samp %>%
filter(!paste0(subj, session, wave) %in% paste0(bad$subj, bad$session, bad$wave)) %>%
as.data.table
With that threshold, 3 wave\(\cdot\)session\(\cdot\)subjs don’t meet criteria. Three of those subjects were also ID’d by Jo’s (less strict) “sleeping” criteria. (Her criteria didn’t ID any others not picked up by the 10% threshold.)
mat_good_data <-
table(subjs_samp$subj, paste0(subjs_samp$wave, "_", subjs_samp$session)) %>%
as.data.frame %>%
rename(subj = Var1, wave_session = Var2)
x <- as.matrix(table(subjs_samp$subj, paste0(subjs_samp$wave, "_", subjs_samp$session)))
d <- dist(x, method = "euclidean") # distance matrix
fit <- hclust(d, method="ward.D")
# plot(fit)
groups <- cutree(fit, k = 5) # cut tree into 5 clusters
mat_good_data$subj <- factor(mat_good_data$subj, levels = names(sort(groups)))
mat_good_data %>%
mutate(wave_session = gsub("wave([1-3])_(...).*", "w\\1_\\2", wave_session)) %>%
ggplot(aes(wave_session, subj, fill = Freq)) +
geom_raster() +
scale_fill_gradient(low = "white", high = "black") +
theme(legend.position = "none") +
labs(x = "wave_session", y = "subj", title = "missing (white)")
twins_in_set <- function(x) x[duplicated(x)]
subjs_bp <- subjs_samp[wave == "wave1" & session %in% c("baseline", "proactive")]
subjs_both_b1p1 <- subjs_bp$subj[duplicated(subjs_bp$subj)] ## get subjs in both
subjs_bp <- subjs_bp[subj %in% subjs_both_b1p1]
n_b1p1 <- length(unique(subjs_bp$subj)) ## wave 1 n subjs
twins_b1p1 <- twins_in_set(subjs_bp[session == "baseline" & !is.na(twin)]$twinpair) ## wave 1 n twinpairs
n_twins_b1p1 <- length(twins_b1p1) ## wave 1 n twinpairs
n_b1p1 ## n total
## [1] 83
n_twins_b1p1 ## n twinpairs
## [1] 19
n_b1p1 - n_twins_b1p1 ## n unrelated
## [1] 64
subjs_bp2 <- subjs_samp[wave == "wave2" & session %in% c("baseline", "proactive")]
subjs_both_b2p2 <- subjs_bp2$subj[duplicated(subjs_bp2$subj)] ## get subjs in both
subjs_bp2 <- subjs_bp2[subj %in% subjs_both_b2p2]
n_b2p2 <- length(unique(subjs_bp2$subj)) ## wave 2 n subjs
twins_b2p2 <- twins_in_set(subjs_bp2[session == "baseline" & !is.na(twin)]$twinpair)
n_twins_b2p2 <- length(twins_b2p2) ## wave 1 n twinpairs
n_b2p2 ## n total
## [1] 40
n_twins_b2p2 ## n twinpairs
## [1] 13
n_b2p2 - n_twins_b2p2 ## n unrelated
## [1] 27
length(intersect(subjs_bp2$subj, subjs_bp$subj)) ## subjs in BP2 that were in BP2
## [1] 34
subjs_bp3 <- subjs_samp[wave == "wave3" & session %in% c("baseline", "proactive")]
subjs_both_b3p3 <- subjs_bp3$subj[duplicated(subjs_bp3$subj)] ## get subjs in both
subjs_bp3 <- subjs_bp3[subj %in% subjs_both_b3p3]
n_b3p3 <- length(unique(subjs_bp3$subj)) ## wave 3 n subjs
twins_b3p3 <- twins_in_set(subjs_bp3[session == "baseline" & !is.na(twin)]$twinpair) ## wave 1 n twinpairs
n_twins_b3p3 <- length(twins_b3p3) ## wave 1 n twinpairs
n_b3p3 ## n total
## [1] 5
n_twins_b3p3 ## n twinpairs
## [1] 1
n_b3p3 - n_twins_b3p3 ## n unrelated
## [1] 4
subjs_b2p1 <- subjs_samp[(wave == "wave1" & session %in% "proactive") | (wave == "wave2" & session %in% "baseline")]
subjs_both_b2p1 <- subjs_b2p1$subj[duplicated(subjs_b2p1$subj)] ## get subjs in both
subjs_b2p1 <- subjs_b2p1[subj %in% subjs_both_b2p1]
n_b2p1 <- length(unique(subjs_b2p1$subj)) ## wave 2 n subjs
twins_b2p1 <- twins_in_set(subjs_b2p1[session == "baseline" & !is.na(twin)]$twinpair)
n_twins_b2p1 <- length(twins_b2p1) ## wave 1 n twinpairs
n_b2p1 ## n total
## [1] 39
n_twins_b2p1 ## n twinpairs
## [1] 12
n_b2p1 - n_twins_b2p1 ## n unrelated
## [1] 27
subjs_b3p1 <- subjs_samp[(wave == "wave1" & session %in% "proactive") | (wave == "wave3" & session %in% "baseline")]
subjs_both_b3p1 <- subjs_b3p1$subj[duplicated(subjs_b3p1$subj)] ## get subjs in both
subjs_b3p1 <- subjs_b3p1[subj %in% subjs_both_b3p1]
n_b3p1 <- length(unique(subjs_b3p1$subj)) ## wave 2 n subjs
twins_b3p1 <- twins_in_set(subjs_b3p1[session == "baseline" & !is.na(twin)]$twinpair) ## wave 1 n twinpairs
n_twins_b3p1 <- length(twins_b3p1) ## wave 1 n twinpairs
n_b3p1 ## n total
## [1] 5
n_twins_b3p1 ## n twinpairs
## [1] 1
n_b3p1 - n_twins_b3p1 ## n unrelated
## [1] 4
subjs_r1 <- subjs_samp[wave == "wave1" & session == "reactive"]
n_r1 <- length(unique(subjs_r1$subj)) ## wave 1 n subjs
twins_r1 <- twins_in_set(subjs_r1[session == "reactive" & !is.na(twin)]$twinpair) ## wave 1 n twinpairs
n_twins_r1 <- length(twins_r1) ## wave 1 n twinpairs
n_r1 ## n total
## [1] 91
n_twins_r1 ## n twinpairs
## [1] 23
n_r1 - n_twins_r1 ## n unrelated
## [1] 68
subjs_r1r2 <- subjs_samp[session == "reactive" & wave %in% c("wave1", "wave2")]
subjs_both_r1r2 <- subjs_r1r2$subj[duplicated(subjs_r1r2$subj)] ## get subjs in both
subjs_r1r2 <- subjs_r1r2[subj %in% subjs_both_r1r2]
n_r1r2 <- length(unique(subjs_r1r2$subj)) ## wave 1 n subjs
twins_r1r2 <- twins_in_set(subjs_r1r2[wave == "wave1" & !is.na(twin)]$twinpair) ## wave 1 n twinpairs
n_twins_r1r2 <- length(twins_r1r2) ## wave 1 n twinpairs
n_r1r2 ## n total
## [1] 39
n_twins_r1r2 ## n twinpairs
## [1] 12
n_r1r2 - n_twins_r1r2 ## n unrelated
## [1] 27
out <- subjs_samp[wave %in% c("wave1", "wave2"), -c("nrow_eprime", "twin", "issue")]
sample_cotwins <- function(x) {
set.seed(0)
twinpairs <- unique(x$twinpair[duplicated(x$twinpair)])
x <- x %>% filter(twinpair %in% twinpairs) ## only twins in sample
x %>%
group_by(twinpair) %>%
sample_n(1) %>%
pull(subj)
}
cotwins_b1p1 <- sample_cotwins(subjs_twins[twinpair %in% twins_b1p1]) ## to exclude
out$is_mc_mi <- FALSE
out[
wave == "wave1" & session %in% c("baseline", "proactive") & subj %in% setdiff(subjs_both_b1p1, cotwins_b1p1)
]$is_mc_mi <- TRUE
out$is_mc_mi_cotwin <- FALSE
out[
wave == "wave1" & session %in% c("baseline", "proactive") & subj %in% cotwins_b1p1
]$is_mc_mi_cotwin <- TRUE
sum(out$is_mc_mi) / 2
## [1] 64
sum(out$is_mc_mi_cotwin) / 2
## [1] 19
### order control
cotwins_b2p1 <- sample_cotwins(subjs_twins[twinpair %in% twins_b2p1]) ## to exclude
out$is_mi_mc <- FALSE
out[
((wave == "wave2" & session %in% "baseline") | (wave == "wave1" & session %in% "proactive")) &
subj %in% setdiff(subjs_both_b2p1, cotwins_b2p1)
]$is_mi_mc <- TRUE
out$is_mi_mc_cotwin <- FALSE
out[
((wave == "wave2" & session %in% "baseline") | (wave == "wave1" & session %in% "proactive")) &
subj %in% cotwins_b2p1
]$is_mi_mc_cotwin <- TRUE
sum(out$is_mi_mc) / 2
## [1] 27
sum(out$is_mi_mc_cotwin) / 2
## [1] 12
## retest
subjs_bp_retest <- intersect(subjs_both_b2p2, subjs_both_b1p1)
cotwins_bp_retest <- sample_cotwins(subjs_twins[subj %in% subjs_bp_retest]) ## to exclude
out$is_mc_mi_retest <- FALSE
out[
session %in% c("baseline", "proactive") & subj %in% setdiff(subjs_bp_retest, cotwins_bp_retest)
]$is_mc_mi_retest <- TRUE
out$is_mc_mi_retest_cotwin <- FALSE
out[
session %in% c("baseline", "proactive") & subj %in% cotwins_bp_retest
]$is_mc_mi_retest_cotwin <- TRUE
sum(out$is_mc_mi_retest) / 4
## [1] 26
sum(out$is_mc_mi_retest_cotwin) / 4
## [1] 8
subjs_r_retest <- unique(subjs_r1r2$subj)
cotwins_r_retest <- sample_cotwins(subjs_twins[subj %in% subjs_r_retest]) ## to exclude
out$is_ispc_retest <- FALSE
out[
session %in% c("reactive") & subj %in% setdiff(subjs_r_retest, cotwins_r_retest)
]$is_ispc_retest <- TRUE
out$is_ispc_retest_cotwin <- FALSE
out[
session %in% c("reactive") & subj %in% cotwins_r_retest
]$is_ispc_retest_cotwin <- TRUE
sum(out$is_ispc_retest) / 2
## [1] 27
sum(out$is_ispc_retest_cotwin) / 2
## [1] 12
fwrite(out, here("out", "subjlist.csv"))